
import requests

from bs4 import BeautifulSoup

import bs4



#获取网页内容

def getHTMLText(url):

    try:

        r = requests.get(url,timeout = 30)

        r.raise_for_status()

        r.encoding = r.apparent_encoding

        return r.text

    except:

        return ""



#给点数据结构

def fillUnivList(Ulist,html):

    soup = BeautifulSoup(html,"html.parser")

    for tr in soup.find("tbody").children:

        if isinstance(tr,bs4.element.Tag):

            tds = tr("td")

            Ulist.append([tds[0].string,tds[1].string,tds[3].string,tds[2].string])



#内容输出

def printUnivList(Ulist,num):

    #print("Suc"+str(num))

    #中文输出对齐

    tplit = "{0:^10}\t{1:{4}^10}\t{2:^10}\t{3:^10}"

    print(tplit.format("排名","学校名称","总分","省市",chr(12288)))

    for i in range(num):

        u = Ulist[i]

        print(tplit.format(u[0],u[1],u[2],u[3],chr(12288)))



#主函数

def main():

    Uinfo = []

    url = "http://www.zuihaodaxue.com/zuihaodaxuepaiming2016.html"

    html = getHTMLText(url)

    fillUnivList(Uinfo,html)

    printUnivList(Uinfo,20) #20 uinvs

main()
